#include <limits.h>
#include <setjmp.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "regexp.h"

#define nelem(a) (sizeof (a) / sizeof (a)[0])

typedef unsigned int Rune;

static int isalpharune(Rune c) {
    /* TODO: Add unicode support */
    return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}

static Rune toupperrune(Rune c) {
    /* TODO: Add unicode support */
    if (c >= 'a' && c <= 'z')
        return c - 'a' + 'A';
    return c;
}

static int chartorune(Rune *r, const char *s) {
    /* TODO: Add UTF-8 decoding */
    *r = *s;
    return 1;
}

#define REPINF 255
#define MAXSUB REG_MAXSUB
#define MAXPROG (32 << 10)

typedef struct Reclass Reclass;
typedef struct Renode Renode;
typedef struct Reinst Reinst;
typedef struct Rethread Rethread;

struct Reclass {
    Rune *end;
    Rune spans[64];
};

struct Reprog {
    Reinst *start, *end;
    int flags;
    unsigned int nsub;
    Reclass cclass[16];
};

static struct {
    Reprog *prog;
    Renode *pstart, *pend;

    const char *source;
    unsigned int ncclass;
    unsigned int nsub;
    Renode *sub[MAXSUB];

    int lookahead;
    Rune yychar;
    Reclass *yycc;
    int yymin, yymax;

    const char *error;
    jmp_buf kaboom;
} g;

static void die(const char *message) {
    g.error = message;
    longjmp(g.kaboom, 1);
}

static Rune canon(Rune c) {
    Rune u = toupperrune(c);
    if (c >= 128 && u < 128)
        return c;
    return u;
}

/* Scan */

enum {
    L_CHAR = 256,
    L_CCLASS,    /* character class */
    L_NCCLASS,    /* negative character class */
    L_NC,        /* "(?:" no capture */
    L_PLA,        /* "(?=" positive lookahead */
    L_NLA,        /* "(?!" negative lookahead */
    L_WORD,        /* "\b" word boundary */
    L_NWORD,    /* "\B" non-word boundary */
    L_REF,        /* "\1" back-reference */
    L_COUNT        /* {M,N} */
};

static int hex(int c) {
    if (c >= '0' && c <= '9') return c - '0';
    if (c >= 'a' && c <= 'f') return c - 'a' + 0xA;
    if (c >= 'A' && c <= 'F') return c - 'A' + 0xA;
    die("invalid escape sequence");
    return 0;
}

static int dec(int c) {
    if (c >= '0' && c <= '9') return c - '0';
    die("invalid quantifier");
    return 0;
}

#define ESCAPES "BbDdSsWw^$\\.*+?()[]{}|0123456789"

static int isunicodeletter(int c) {
    return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || isalpharune(c);
}

static int nextrune(void) {
    g.source += chartorune(&g.yychar, g.source);
    if (g.yychar == '\\') {
        g.source += chartorune(&g.yychar, g.source);
        switch (g.yychar) {
            case 0:
                die("unterminated escape sequence");
                break;
            case 'f':
                g.yychar = '\f';
                return 0;
            case 'n':
                g.yychar = '\n';
                return 0;
            case 'r':
                g.yychar = '\r';
                return 0;
            case 't':
                g.yychar = '\t';
                return 0;
            case 'v':
                g.yychar = '\v';
                return 0;
            case 'c':
                g.yychar = (*g.source++) & 31;
                return 0;
            case 'x':
                g.yychar = hex(*g.source++) << 4;
                g.yychar += hex(*g.source++);
                if (g.yychar == 0) {
                    g.yychar = '0';
                    return 1;
                }
                return 0;
            case 'u':
                g.yychar = hex(*g.source++) << 12;
                g.yychar += hex(*g.source++) << 8;
                g.yychar += hex(*g.source++) << 4;
                g.yychar += hex(*g.source++);
                if (g.yychar == 0) {
                    g.yychar = '0';
                    return 1;
                }
                return 0;
        }
        if (strchr(ESCAPES, g.yychar))
            return 1;
        if (isunicodeletter(g.yychar) || g.yychar == '_') /* check identity escape */
            die("invalid escape character");
        return 0;
    }
    return 0;
}

static int lexcount(void) {
    g.yychar = *g.source++;

    g.yymin = dec(g.yychar);
    g.yychar = *g.source++;
    while (g.yychar != ',' && g.yychar != '}') {
        g.yymin = g.yymin * 10 + dec(g.yychar);
        g.yychar = *g.source++;
    }
    if (g.yymin >= REPINF)
        die("numeric overflow");

    if (g.yychar == ',') {
        g.yychar = *g.source++;
        if (g.yychar == '}') {
            g.yymax = REPINF;
        } else {
            g.yymax = dec(g.yychar);
            g.yychar = *g.source++;
            while (g.yychar != '}') {
                g.yymax = g.yymax * 10 + dec(g.yychar);
                g.yychar = *g.source++;
            }
            if (g.yymax >= REPINF)
                die("numeric overflow");
        }
    } else {
        g.yymax = g.yymin;
    }

    return L_COUNT;
}

static void newcclass(void) {
    if (g.ncclass >= nelem(g.prog->cclass))
    die("too many character classes");
    g.yycc = g.prog->cclass + g.ncclass++;
    g.yycc->end = g.yycc->spans;
}

static void addrange(Rune a, Rune b) {
    if (a > b)
        die("invalid character class range");
    if (g.yycc->end + 2 == g.yycc->spans + nelem(g.yycc->spans))
    die("too many character class ranges");
    *g.yycc->end++ = a;
    *g.yycc->end++ = b;
}

static void addranges_d(void) {
    addrange('0', '9');
}

static void addranges_D(void) {
    addrange(0, '0' - 1);
    addrange('9' + 1, 0xFFFF);
}

static void addranges_s(void) {
    addrange(0x9, 0x9);
    addrange(0xA, 0xD);
    addrange(0x20, 0x20);
    addrange(0xA0, 0xA0);
    addrange(0x2028, 0x2029);
    addrange(0xFEFF, 0xFEFF);
}

static void addranges_S(void) {
    addrange(0, 0x9 - 1);
    addrange(0x9 + 1, 0xA - 1);
    addrange(0xD + 1, 0x20 - 1);
    addrange(0x20 + 1, 0xA0 - 1);
    addrange(0xA0 + 1, 0x2028 - 1);
    addrange(0x2029 + 1, 0xFEFF - 1);
    addrange(0xFEFF + 1, 0xFFFF);
}

static void addranges_w(void) {
    addrange('0', '9');
    addrange('A', 'Z');
    addrange('_', '_');
    addrange('a', 'z');
}

static void addranges_W(void) {
    addrange(0, '0' - 1);
    addrange('9' + 1, 'A' - 1);
    addrange('Z' + 1, '_' - 1);
    addrange('_' + 1, 'a' - 1);
    addrange('z' + 1, 0xFFFF);
}

static int lexclass(void) {
    int type = L_CCLASS;
    int quoted, havesave, havedash;
    Rune save = 0;

    newcclass();

    quoted = nextrune();
    if (!quoted && g.yychar == '^') {
        type = L_NCCLASS;
        quoted = nextrune();
    }

    havesave = havedash = 0;
    for (;;) {
        if (g.yychar == 0)
            die("unterminated character class");
        if (!quoted && g.yychar == ']')
            break;

        if (!quoted && g.yychar == '-') {
            if (havesave) {
                if (havedash) {
                    addrange(save, '-');
                    havesave = havedash = 0;
                } else {
                    havedash = 1;
                }
            } else {
                save = '-';
                havesave = 1;
            }
        } else if (quoted && strchr("DSWdsw", g.yychar)) {
            if (havesave) {
                addrange(save, save);
                if (havedash)
                    addrange('-', '-');
            }
            switch (g.yychar) {
                case 'd':
                    addranges_d();
                    break;
                case 's':
                    addranges_s();
                    break;
                case 'w':
                    addranges_w();
                    break;
                case 'D':
                    addranges_D();
                    break;
                case 'S':
                    addranges_S();
                    break;
                case 'W':
                    addranges_W();
                    break;
            }
            havesave = havedash = 0;
        } else {
            if (quoted) {
                if (g.yychar == 'b')
                    g.yychar = '\b';
                else if (g.yychar == '0')
                    g.yychar = 0;
                /* else identity escape */
            }
            if (havesave) {
                if (havedash) {
                    addrange(save, g.yychar);
                    havesave = havedash = 0;
                } else {
                    addrange(save, save);
                    save = g.yychar;
                }
            } else {
                save = g.yychar;
                havesave = 1;
            }
        }

        quoted = nextrune();
    }

    if (havesave) {
        addrange(save, save);
        if (havedash)
            addrange('-', '-');
    }

    return type;
}

static int lex(void) {
    int quoted = nextrune();
    if (quoted) {
        switch (g.yychar) {
            case 'b':
                return L_WORD;
            case 'B':
                return L_NWORD;
            case 'd':
                newcclass();
                addranges_d();
                return L_CCLASS;
            case 's':
                newcclass();
                addranges_s();
                return L_CCLASS;
            case 'w':
                newcclass();
                addranges_w();
                return L_CCLASS;
            case 'D':
                newcclass();
                addranges_d();
                return L_NCCLASS;
            case 'S':
                newcclass();
                addranges_s();
                return L_NCCLASS;
            case 'W':
                newcclass();
                addranges_w();
                return L_NCCLASS;
            case '0':
                g.yychar = 0;
                return L_CHAR;
        }
        if (g.yychar >= '0' && g.yychar <= '9') {
            g.yychar -= '0';
            if (*g.source >= '0' && *g.source <= '9')
                g.yychar = g.yychar * 10 + *g.source++ - '0';
            return L_REF;
        }
        return L_CHAR;
    }

    switch (g.yychar) {
        case 0:
        case '$':
        case ')':
        case '*':
        case '+':
        case '.':
        case '?':
        case '^':
        case '|':
            return g.yychar;
    }

    if (g.yychar == '{')
        return lexcount();
    if (g.yychar == '[')
        return lexclass();
    if (g.yychar == '(') {
        if (g.source[0] == '?') {
            if (g.source[1] == ':') {
                g.source += 2;
                return L_NC;
            }
            if (g.source[1] == '=') {
                g.source += 2;
                return L_PLA;
            }
            if (g.source[1] == '!') {
                g.source += 2;
                return L_NLA;
            }
        }
        return '(';
    }

    return L_CHAR;
}

/* Parse */

enum {
    P_CAT, P_ALT, P_REP,
    P_BOL, P_EOL, P_WORD, P_NWORD,
    P_PAR, P_PLA, P_NLA,
    P_ANY, P_CHAR, P_CCLASS, P_NCCLASS,
    P_REF
};

struct Renode {
    unsigned char type;
    unsigned char ng, m, n;
    Rune c;
    Reclass *cc;
    Renode *x;
    Renode *y;
};

static Renode *newnode(int type) {
    Renode *node = g.pend++;
    node->type = type;
    node->cc = NULL;
    node->c = 0;
    node->ng = 0;
    node->m = 0;
    node->n = 0;
    node->x = node->y = NULL;
    return node;
}

static int empty(Renode *node) {
    if (!node) return 1;
    switch (node->type) {
        default:
            return 1;
        case P_CAT:
            return empty(node->x) && empty(node->y);
        case P_ALT:
            return empty(node->x) || empty(node->y);
        case P_REP:
            return empty(node->x) || node->m == 0;
        case P_PAR:
            return empty(node->x);
        case P_REF:
            return empty(node->x);
        case P_ANY:
        case P_CHAR:
        case P_CCLASS:
        case P_NCCLASS:
            return 0;
    }
}

static Renode *newrep(Renode *atom, int ng, int min, int max) {
    Renode *rep = newnode(P_REP);
    if (max == REPINF && empty(atom))
        die("infinite loop matching the empty string");
    rep->ng = ng;
    rep->m = min;
    rep->n = max;
    rep->x = atom;
    return rep;
}

static void next(void) {
    g.lookahead = lex();
}

static int accept(int t) {
    if (g.lookahead == t) {
        next();
        return 1;
    }
    return 0;
}

static Renode *parsealt(void);

static Renode *parseatom(void) {
    Renode *atom;
    if (g.lookahead == L_CHAR) {
        atom = newnode(P_CHAR);
        atom->c = g.yychar;
        next();
        return atom;
    }
    if (g.lookahead == L_CCLASS) {
        atom = newnode(P_CCLASS);
        atom->cc = g.yycc;
        next();
        return atom;
    }
    if (g.lookahead == L_NCCLASS) {
        atom = newnode(P_NCCLASS);
        atom->cc = g.yycc;
        next();
        return atom;
    }
    if (g.lookahead == L_REF) {
        atom = newnode(P_REF);
        if (g.yychar == 0 || g.yychar > g.nsub || !g.sub[g.yychar])
            die("invalid back-reference");
        atom->n = g.yychar;
        atom->x = g.sub[g.yychar];
        next();
        return atom;
    }
    if (accept('.'))
        return newnode(P_ANY);
    if (accept('(')) {
        atom = newnode(P_PAR);
        if (g.nsub == MAXSUB)
            die("too many captures");
        atom->n = g.nsub++;
        atom->x = parsealt();
        g.sub[atom->n] = atom;
        if (!accept(')'))
            die("unmatched '('");
        return atom;
    }
    if (accept(L_NC)) {
        atom = parsealt();
        if (!accept(')'))
            die("unmatched '('");
        return atom;
    }
    if (accept(L_PLA)) {
        atom = newnode(P_PLA);
        atom->x = parsealt();
        if (!accept(')'))
            die("unmatched '('");
        return atom;
    }
    if (accept(L_NLA)) {
        atom = newnode(P_NLA);
        atom->x = parsealt();
        if (!accept(')'))
            die("unmatched '('");
        return atom;
    }
    die("syntax error");
    return NULL;
}

static Renode *parserep(void) {
    Renode *atom;

    if (accept('^')) return newnode(P_BOL);
    if (accept('$')) return newnode(P_EOL);
    if (accept(L_WORD)) return newnode(P_WORD);
    if (accept(L_NWORD)) return newnode(P_NWORD);

    atom = parseatom();
    if (g.lookahead == L_COUNT) {
        int min = g.yymin, max = g.yymax;
        next();
        if (max < min)
            die("invalid quantifier");
        return newrep(atom, accept('?'), min, max);
    }
    if (accept('*')) return newrep(atom, accept('?'), 0, REPINF);
    if (accept('+')) return newrep(atom, accept('?'), 1, REPINF);
    if (accept('?')) return newrep(atom, accept('?'), 0, 1);
    return atom;
}

static Renode *parsecat(void) {
    Renode *cat, *head, **tail;
    if (g.lookahead && g.lookahead != '|' && g.lookahead != ')') {
        /* Build a right-leaning tree by splicing in new 'cat' at the tail. */
        head = parserep();
        tail = &head; // **(head)
        while (g.lookahead && g.lookahead != '|' && g.lookahead != ')') {
            cat = newnode(P_CAT);
            cat->x = *tail;
            cat->y = parserep();
            *tail = cat;
            tail = &cat->y;
        }
        return head;
    }
    return NULL;
}

static Renode *parsealt(void) {
    Renode *alt, *x;
    alt = parsecat();
    while (accept('|')) {
        x = alt;
        alt = newnode(P_ALT);
        alt->x = x;
        alt->y = parsecat();
    }
    return alt;
}

/* Compile */

enum {
    I_END, I_JUMP, I_SPLIT, I_PLA, I_NLA,
    I_ANYNL, I_ANY, I_CHAR, I_CCLASS, I_NCCLASS, I_REF,
    I_BOL, I_EOL, I_WORD, I_NWORD,
    I_LPAR, I_RPAR
};

struct Reinst {
    unsigned char opcode;
    unsigned char n;
    Rune c;
    Reclass *cc;
    Reinst *x;
    Reinst *y;
};

static unsigned int count(Renode *node) {
    unsigned int min, max, n;
    if (!node) return 0;
    switch (node->type) {
        default:
            return 1;
        case P_CAT:
            return count(node->x) + count(node->y);
        case P_ALT:
            return count(node->x) + count(node->y) + 2;
        case P_REP:
            min = node->m;
            max = node->n;
            if (min == max) n = count(node->x) * min;
            else if (max < REPINF) n = count(node->x) * max + (max - min);
            else n = count(node->x) * (min + 1) + 2;
            if (n > MAXPROG) die("program too large");
            return n;
        case P_PAR:
            return count(node->x) + 2;
        case P_PLA:
            return count(node->x) + 2;
        case P_NLA:
            return count(node->x) + 2;
    }
}

static Reinst *emit(Reprog *prog, int opcode) {
    Reinst *inst = prog->end++;
    inst->opcode = opcode;
    inst->n = 0;
    inst->c = 0;
    inst->cc = NULL;
    inst->x = inst->y = NULL;
    return inst;
}

static void compile(Reprog *prog, Renode *node) {
    Reinst *inst, *split, *jump;
    unsigned int i;

    if (!node)
        return;

    loop:
    switch (node->type) {
        case P_CAT:
            compile(prog, node->x);
            node = node->y;
            goto loop;

        case P_ALT:
            split = emit(prog, I_SPLIT);
            compile(prog, node->x);
            jump = emit(prog, I_JUMP);
            compile(prog, node->y);
            split->x = split + 1;
            split->y = jump + 1;
            jump->x = prog->end;
            break;

        case P_REP:
            for (i = 0; i < node->m; ++i) {
                inst = prog->end;
                compile(prog, node->x);
            }
            if (node->m == node->n)
                break;
            if (node->n < REPINF) {
                for (i = node->m; i < node->n; ++i) {
                    split = emit(prog, I_SPLIT);
                    compile(prog, node->x);
                    if (node->ng) {
                        split->y = split + 1;
                        split->x = prog->end;
                    } else {
                        split->x = split + 1;
                        split->y = prog->end;
                    }
                }
            } else if (node->m == 0) {
                split = emit(prog, I_SPLIT);
                compile(prog, node->x);
                jump = emit(prog, I_JUMP);
                if (node->ng) {
                    split->y = split + 1;
                    split->x = prog->end;
                } else {
                    split->x = split + 1;
                    split->y = prog->end;
                }
                jump->x = split;
            } else {
                split = emit(prog, I_SPLIT);
                if (node->ng) {
                    split->y = inst;
                    split->x = prog->end;
                } else {
                    split->x = inst;
                    split->y = prog->end;
                }
            }
            break;

        case P_BOL:
            emit(prog, I_BOL);
            break;
        case P_EOL:
            emit(prog, I_EOL);
            break;
        case P_WORD:
            emit(prog, I_WORD);
            break;
        case P_NWORD:
            emit(prog, I_NWORD);
            break;

        case P_PAR:
            inst = emit(prog, I_LPAR);
            inst->n = node->n;
            compile(prog, node->x);
            inst = emit(prog, I_RPAR);
            inst->n = node->n;
            break;
        case P_PLA:
            split = emit(prog, I_PLA);
            compile(prog, node->x);
            emit(prog, I_END);
            split->x = split + 1;
            split->y = prog->end;
            break;
        case P_NLA:
            split = emit(prog, I_NLA);
            compile(prog, node->x);
            emit(prog, I_END);
            split->x = split + 1;
            split->y = prog->end;
            break;

        case P_ANY:
            emit(prog, I_ANY);
            break;
        case P_CHAR:
            inst = emit(prog, I_CHAR);
            inst->c = (prog->flags & REG_ICASE) ? canon(node->c) : node->c;
            break;
        case P_CCLASS:
            inst = emit(prog, I_CCLASS);
            inst->cc = node->cc;
            break;
        case P_NCCLASS:
            inst = emit(prog, I_NCCLASS);
            inst->cc = node->cc;
            break;
        case P_REF:
            inst = emit(prog, I_REF);
            inst->n = node->n;
            break;
    }
}

#ifdef TEST

static void dumpnode(Renode *node) {
    Rune *p;
    if (!node) {
        printf("Empty");
        return;
    }
    switch (node->type) {
        case P_CAT:
            printf("Cat(");
            dumpnode(node->x);
            printf(", ");
            dumpnode(node->y);
            printf(")");
            break;
        case P_ALT:
            printf("Alt(");
            dumpnode(node->x);
            printf(", ");
            dumpnode(node->y);
            printf(")");
            break;
        case P_REP:
            printf(node->ng ? "NgRep(%d,%d," : "Rep(%d,%d,", node->m, node->n);
            dumpnode(node->x);
            printf(")");
            break;
        case P_BOL:
            printf("Bol");
            break;
        case P_EOL:
            printf("Eol");
            break;
        case P_WORD:
            printf("Word");
            break;
        case P_NWORD:
            printf("NotWord");
            break;
        case P_PAR:
            printf("Par(%d,", node->n);
            dumpnode(node->x);
            printf(")");
            break;
        case P_PLA:
            printf("PLA(");
            dumpnode(node->x);
            printf(")");
            break;
        case P_NLA:
            printf("NLA(");
            dumpnode(node->x);
            printf(")");
            break;
        case P_ANY:
            printf("Any");
            break;
        case P_CHAR:
            printf("Char(%c)", node->c);
            break;
        case P_CCLASS:
            printf("Class(");
            for (p = node->cc->spans; p < node->cc->end; p += 2) printf("%02X-%02X,", p[0], p[1]);
            printf(")");
            break;
        case P_NCCLASS:
            printf("NotClass(");
            for (p = node->cc->spans; p < node->cc->end; p += 2) printf("%02X-%02X,", p[0], p[1]);
            printf(")");
            break;
        case P_REF:
            printf("Ref(%d)", node->n);
            break;
    }
}

static void dumpprog(Reprog *prog) {
    Reinst *inst;
    int i;
    for (i = 0, inst = prog->start; inst < prog->end; ++i, ++inst) {
        printf("% 5d: ", i);
        switch (inst->opcode) {
            case I_END:
                puts("end");
                break;
            case I_JUMP:
                printf("jump %d\n", (int) (inst->x - prog->start));
                break;
            case I_SPLIT:
                printf("split %d %d\n", (int) (inst->x - prog->start), (int) (inst->y - prog->start));
                break;
            case I_PLA:
                printf("pla %d %d\n", (int) (inst->x - prog->start), (int) (inst->y - prog->start));
                break;
            case I_NLA:
                printf("nla %d %d\n", (int) (inst->x - prog->start), (int) (inst->y - prog->start));
                break;
            case I_ANY:
                puts("any");
                break;
            case I_ANYNL:
                puts("anynl");
                break;
            case I_CHAR:
                printf(inst->c >= 32 && inst->c < 127 ? "char '%c'\n" : "char U+%04X\n", inst->c);
                break;
            case I_CCLASS:
                puts("cclass");
                break;
            case I_NCCLASS:
                puts("ncclass");
                break;
            case I_REF:
                printf("ref %d\n", inst->n);
                break;
            case I_BOL:
                puts("bol");
                break;
            case I_EOL:
                puts("eol");
                break;
            case I_WORD:
                puts("word");
                break;
            case I_NWORD:
                puts("nword");
                break;
            case I_LPAR:
                printf("lpar %d\n", inst->n);
                break;
            case I_RPAR:
                printf("rpar %d\n", inst->n);
                break;
        }
    }
}

#endif

Reprog *regcomp(const char *pattern, int cflags, const char **errorp) {
    Renode *node;
    Reinst *split, *jump;
    int i, n;

    g.pstart = NULL;
    g.prog = NULL;

    if (setjmp(g.kaboom)) {
        if (errorp) *errorp = g.error;
        free(g.pstart);
        free(g.prog);
        return NULL;
    }

    g.prog = malloc(sizeof(Reprog));
    if (!g.prog)
        die("cannot allocate regular expression");
    n = strlen(pattern) * 2;
    if (n > 0) {
        g.pstart = g.pend = malloc(sizeof(Renode) * n);
        if (!g.pstart)
            die("cannot allocate regular expression parse list");
    }

    g.source = pattern;
    g.ncclass = 0;
    g.nsub = 1;
    for (i = 0; i < MAXSUB; ++i)
        g.sub[i] = 0;

    g.prog->flags = cflags;

    next();
    node = parsealt();
    if (g.lookahead == ')')
        die("unmatched ')'");
    if (g.lookahead != 0)
        die("syntax error");

#ifdef TEST
    dumpnode(node);
    putchar('\n');
#endif

    n = 6 + count(node);
    if (n < 0 || n > MAXPROG)
        die("program too large");

    g.prog->nsub = g.nsub;
    g.prog->start = g.prog->end = malloc(n * sizeof(Reinst));

    split = emit(g.prog, I_SPLIT);
    split->x = split + 3;
    split->y = split + 1;
    emit(g.prog, I_ANYNL);
    jump = emit(g.prog, I_JUMP);
    jump->x = split;
    emit(g.prog, I_LPAR);
    compile(g.prog, node);
    emit(g.prog, I_RPAR);
    emit(g.prog, I_END);

#ifdef TEST
    dumpprog(g.prog);
#endif

    free(g.pstart);

    if (errorp) *errorp = NULL;
    return g.prog;
}

void regfree(Reprog *prog) {
    if (prog) {
        free(prog->start);
        free(prog);
    }
}

/* Match */

static int isnewline(int c) {
    return c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
}

static int iswordchar(int c) {
    return c == '_' ||
           (c >= 'a' && c <= 'z') ||
           (c >= 'A' && c <= 'Z') ||
           (c >= '0' && c <= '9');
}

static int incclass(Reclass *cc, Rune c) {
    Rune *p;
    for (p = cc->spans; p < cc->end; p += 2)
        if (p[0] <= c && c <= p[1])
            return 1;
    return 0;
}

static int incclasscanon(Reclass *cc, Rune c) {
    Rune *p, r;
    for (p = cc->spans; p < cc->end; p += 2)
        for (r = p[0]; r <= p[1]; ++r)
            if (c == canon(r))
                return 1;
    return 0;
}

static int strncmpcanon(const char *a, const char *b, unsigned int n) {
    Rune ra, rb;
    int c;
    while (n--) {
        if (!*a) return -1;
        if (!*b) return 1;
        a += chartorune(&ra, a);
        b += chartorune(&rb, b);
        c = canon(ra) - canon(rb);
        if (c)
            return c;
    }
    return 0;
}

static int match(Reinst *pc, const char *sp, const char *bol, int flags, Resub *out) {
    Resub scratch;
    int i;
    Rune c;

    for (;;) {
        switch (pc->opcode) {
            case I_END:
                return 1;
            case I_JUMP:
                pc = pc->x;
                break;
            case I_SPLIT:
                scratch = *out;
                if (match(pc->x, sp, bol, flags, &scratch)) {
                    *out = scratch;
                    return 1;
                }
                pc = pc->y;
                break;

            case I_PLA:
                if (!match(pc->x, sp, bol, flags, out))
                    return 0;
                pc = pc->y;
                break;
            case I_NLA:
                scratch = *out;
                if (match(pc->x, sp, bol, flags, &scratch))
                    return 0;
                pc = pc->y;
                break;

            case I_ANYNL:
                sp += chartorune(&c, sp);
                if (c == 0)
                    return 0;
                pc = pc + 1;
                break;
            case I_ANY:
                sp += chartorune(&c, sp);
                if (c == 0)
                    return 0;
                if (isnewline(c))
                    return 0;
                pc = pc + 1;
                break;
            case I_CHAR:
                sp += chartorune(&c, sp);
                if (c == 0)
                    return 0;
                if (flags & REG_ICASE)
                    c = canon(c);
                if (c != pc->c)
                    return 0;
                pc = pc + 1;
                break;
            case I_CCLASS:
                sp += chartorune(&c, sp);
                if (c == 0)
                    return 0;
                if (flags & REG_ICASE) {
                    if (!incclasscanon(pc->cc, canon(c)))
                        return 0;
                } else {
                    if (!incclass(pc->cc, c))
                        return 0;
                }
                pc = pc + 1;
                break;
            case I_NCCLASS:
                sp += chartorune(&c, sp);
                if (c == 0)
                    return 0;
                if (flags & REG_ICASE) {
                    if (incclasscanon(pc->cc, canon(c)))
                        return 0;
                } else {
                    if (incclass(pc->cc, c))
                        return 0;
                }
                pc = pc + 1;
                break;
            case I_REF:
                i = out->sub[pc->n].ep - out->sub[pc->n].sp;
                if (flags & REG_ICASE) {
                    if (strncmpcanon(sp, out->sub[pc->n].sp, i))
                        return 0;
                } else {
                    if (strncmp(sp, out->sub[pc->n].sp, i))
                        return 0;
                }
                if (i > 0)
                    sp += i;
                pc = pc + 1;
                break;

            case I_BOL:
                if (sp == bol && !(flags & REG_NOTBOL)) {
                    pc = pc + 1;
                    break;
                }
                if (flags & REG_NEWLINE) {
                    if (sp > bol && isnewline(sp[-1])) {
                        pc = pc + 1;
                        break;
                    }
                }
                return 0;
            case I_EOL:
                if (*sp == 0) {
                    pc = pc + 1;
                    break;
                }
                if (flags & REG_NEWLINE) {
                    if (isnewline(*sp)) {
                        pc = pc + 1;
                        break;
                    }
                }
                return 0;
            case I_WORD:
                i = sp > bol && iswordchar(sp[-1]);
                i ^= iswordchar(sp[0]);
                if (!i)
                    return 0;
                pc = pc + 1;
                break;
            case I_NWORD:
                i = sp > bol && iswordchar(sp[-1]);
                i ^= iswordchar(sp[0]);
                if (i)
                    return 0;
                pc = pc + 1;
                break;

            case I_LPAR:
                out->sub[pc->n].sp = sp;
                pc = pc + 1;
                break;
            case I_RPAR:
                out->sub[pc->n].ep = sp;
                pc = pc + 1;
                break;
            default:
                return 0;
        }
    }
}

int regexec(Reprog *prog, const char *sp, Resub *sub, int eflags) {
    Resub scratch;
    int i;

    if (!sub)
        sub = &scratch;

    sub->nsub = prog->nsub;
    for (i = 0; i < MAXSUB; ++i)
        sub->sub[i].sp = sub->sub[i].ep = NULL;

    return !match(prog->start, sp, sp, prog->flags | eflags, sub);
}

#ifdef TEST

int main(int argc, char **argv) {
    const char *error;
    const char *s;
    Reprog *p;
    Resub m;
    unsigned int i;

//    void *temp = "\\..+$";
//    p = (Reprog *) &temp;
//    p = regcomp("\\..+$", 0, &error);

//    p = regcomp(".+\\/(.+\\..+)$", 0, &error);
//    s = "/root/temp/hello.mp3";

    p = regcomp("\\B..", 0, &error);
    s = "noonday";

    printf("nsub = %d\n", p->nsub);
    if (!regexec(p, s, &m, 0)) {
        for (i = 0; i < m.nsub; ++i) {
            int n = m.sub[i].ep - m.sub[i].sp;
            if (n > 0)
                printf("match %d: s=%d e=%d n=%d '%.*s'\n", i, (int) (m.sub[i].sp - s), (int) (m.sub[i].ep - s), n, n,
                       m.sub[i].sp);
            else
                printf("match %d: n=0 ''\n", i);
        }
    } else {
        printf("no match\n");
    }

    return 0;

//    if (argc > 1) {
//        p = regcomp(argv[1], 0, &error);
//        if (!p) {
//            fprintf(stderr, "regcomp: %s\n", error);
//            return 1;
//        }
//
//        if (argc > 2) {
//            s = argv[2];
//            printf("nsub = %d\n", p->nsub);
//            if (!regexec(p, s, &m, 0)) {
//                for (i = 0; i < m.nsub; ++i) {
//                    int n = m.sub[i].ep - m.sub[i].sp;
//                    if (n > 0)
//                        printf("match %d: s=%d e=%d n=%d '%.*s'\n", i, (int) (m.sub[i].sp - s), (int) (m.sub[i].ep - s),
//                               n, n, m.sub[i].sp);
//                    else
//                        printf("match %d: n=0 ''\n", i);
//                }
//            } else {
//                printf("no match\n");
//            }
//        }
//    }
//
//    return 0;
}

#endif
